Rapidly generate figures for atlas and groups

To install the kernel used by NERSC-metatlas users, copy the following text to $HOME/.ipython/kernels/mass_spec_cori/kernel.json

{
 "argv": [
  "/global/common/software/m2650/python-cori/bin/python",
  "-m",
  "IPython.kernel",
  "-f",
  "{connection_file}"
 ],
 "env": {
    "PATH": "/global/common/software/m2650/python-cori/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
 },
 "display_name": "mass_spec_cori",
 "language": "python"
}



In [ ]:

    
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
%matplotlib notebook
import sys
import os
import multiprocessing as mp

#### add a path to your private code if not using production code ####
#sys.path.insert(0,"/global/homes/d/dgct/Repos/metatlas/") #where your private code is
######################################################################

from metatlas.helpers import dill2plots as dp
from metatlas.helpers import fastanalysis as fa
import metatlas.metatlas_objects as metob
from metatlas.helpers import metatlas_get_data_helper_fun as ma_data
# other data tools you might need
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

1. Retrieve atlas by name



In [ ]:

    
atlases = metob.retrieve('Atlas',name='test_atlas_positive_mode',username='*')
names = []
for i,a in enumerate(atlases):
    print(i,a.name,pd.to_datetime(a.last_modified,unit='s'))#len(a.compound_identifications)

2. Pick which atlas from the list above.

For example

atlas = atlases[0]

will select the first one

atlas = atlases[1]

will select the second one... and so forth.



In [ ]:

    
atlas = atlases[0]
print atlas.name
print atlas.username

3. Specify groups and filter by include_list and exclude_list



In [ ]:

    
groups = dp.select_groups_for_analysis(name = 'test_groups_pos',
                                       most_recent = True,
                                       remove_empty = True,
                                       include_list = [], exclude_list = [])#['QC','Blank'])

4. Replace 'results' with directory to store your results



In [ ]:

    
output_dir = '/global/homes/b/bpb/Downloads/test_pos'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
print('your results will be written to %s'%output_dir)

5. Make metatlas dataset



In [ ]:

    
atlas_df = ma_data.make_atlas_df(atlas)
atlas_df['label'] = [cid.name for cid in atlas.compound_identifications]

all_files = []
for my_group in groups:
    for my_file in my_group.items:
        all_files.append((my_file , my_group, atlas_df, atlas))
pool = mp.Pool(processes=min(4, len(all_files)))
metatlas_dataset = pool.map(ma_data.get_data_for_atlas_df_and_file, all_files)
pool.close()
pool.terminate()

print 'done making metatlas_dataset'

6. Select mz, rt, and msms parameters



In [ ]:

    
###
# Parameter meanings:
# each parameter is compared to best scoring metric for each compound
# across all files
###
# 'min_intensity' <= highest intensity across all files for given compound
# 'rt_tolerance' >= shift of median RT across all files for given compound to reference
# 'mz_tolerance' >= ppm of median mz across all files for given compound relative to reference
# 'min_msms_score' <= highest compound dot-product score across all files for given compound relative to reference
# 'min_num_frag_matches' <= number of matching mzs when calculating max_msms_score
# 'min_relative_frag_intensity' <= ratio of second highest to first highest intensity of matching sample mzs


###
# Custom
###
# kwargs = {'min_intensity': ,
#           'rt_tolerance': ,
#           'mz_tolerance': ,
#           'min_msms_score': ,
#           'allow_no_msms': ,
#           'min_num_frag_matches': ,
#           'min_relative_frag_intensity': }


###
# Loose
###
# kwargs = {'min_intensity': 1e3,
#           'rt_tolerance': .25,
#           'mz_tolerance': 25,
#           'min_msms_score': 0.3, 'allow_no_msms': True,
#           'min_num_frag_matches': 1,  'min_relative_frag_intensity': .01}


###
# Strict
###
# kwargs = {'min_intensity': 1e5,
#           'rt_tolerance': .25,
#           'mz_tolerance': 5,
#           'min_msms_score': .6, 'allow_no_msms': False,
#           'min_num_frag_matches': 3, 'min_relative_frag_intensity': .1}

7. Filter dataset by mz, rt, and msms parameters and outputs error bars, chromatograms, and identification figures



In [ ]:

    
fa = reload(fa)

pass_atlas_df, fail_atlas_df, pass_dataset, fail_dataset = fa.filter_and_output(
                                                           atlas_df, metatlas_dataset, output_dir,
                                                           output_pass=True, output_fail=True,
                                                           **kwargs)

8. See how many compounds are remaining



In [ ]:

    
print('%d files and %d compounds in original dataset'%(len(metatlas_dataset),len(metatlas_dataset[0])))
print('%d files and %d compounds in pass_dataset'%(len(pass_dataset),len(pass_dataset[0])))
print('%d files and %d compounds in fail_dataset'%(len(fail_dataset),len(fail_dataset[0])))

Optional: Save filtered atlas



In [ ]:

    
dp = reload(dp)
dp.make_atlas_from_spreadsheet(my_atlas_df,
                               filetype='dataframe',
                               atlas_name='',
                               polarity = #my_polarity,
                               store=True,
                               mz_tolerance = 20)



In [ ]: